

clear all 

* ---------------------------------------------------------------------------- *
* Step 1: Import data from CSV and filter observations
* ---------------------------------------------------------------------------- *

import delimited "$input/scraped_data.csv"  // Import CSV file
rename *, lower  // Convert all variable names to lowercase for consistency

* Keep only firms with "reimbursing" status (i.e., those actively paying back loans)
keep if status2 == "reimbursing"

* Remove entries with missing SIREN numbers (to ensure firm-level identification)
keep if siren != ""

* Exclude platforms that are not part of the main analysis 
* (platforms not offering regular loans or specific industries)
drop if inlist(platform, "enerfip", "myoptions", "investbook", "lendosphere", ///
                      "solylend", "wesharebonds", "gwenneg", "pretgo", "pretstory")

* ---------------------------------------------------------------------------- *
* Step 2: Convert variables and filter time period
* ---------------------------------------------------------------------------- *

gen date = date(beginning, "DMY")   // Convert date variable
gen year = year(date)               // Extract year from the date

* Keep only data from 2016 to 2019
keep if year >= 2016 & year <= 2019

* Convert amount, duration, and rate to numeric (handling errors if needed)
destring amount, replace force
destring duration, replace force 
destring rate, replace force

* ---------------------------------------------------------------------------- *
* Step 3: Aggregate data by platform
* ---------------------------------------------------------------------------- *

collapse (mean) amount duration rate (sum) volume = duration, by(platform)

* Compute total volume of all platforms
egen total = total(volume)

* Compute each platform's market share as a percentage of total volume
gen share = volume / total * 100

* Format platform names for display
gen Platform = strproper(platform)  // Capitalize platform names properly
replace Platform = "Credit.fr" if Platform == "Credit.Fr"  // Correct naming issue

* Create a negative version of share for sorting in descending order
gen m_share = -share

* ---------------------------------------------------------------------------- *
* Step 4: Generate and Export Graphs (Figures B2)
* ---------------------------------------------------------------------------- *

* Market Share Bar Chart
graph hbar share, over(Platform, gap(*1) sort(m_share)) graphregion(color(white)) ///
 bar(1, bcolor(emerald)) bar(2, bcolor(gs10%40) lcolor(black)) ///
 ylabel(, nogrid labsize(medium)) ytitle("")
graph export "$figures\market_share.pdf", as(pdf) replace
graph save Graph "$figures\market_share.gph", replace

* Average Loan Amount by Platform
graph hbar amount, over(Platform, gap(*1) sort(m_share)) graphregion(color(white)) ///
 bar(1, bcolor(emerald)) bar(2, bcolor(gs10%40) lcolor(black)) ///
 ylabel(, nogrid labsize(medium)) ytitle("")
graph export "$figures\avg_amount.pdf", as(pdf) replace
graph save Graph "$figures\avg_amount.gph", replace

* Average Loan Duration by Platform
graph hbar duration, over(Platform, gap(*1) sort(m_share)) graphregion(color(white)) ///
 bar(1, bcolor(emerald)) bar(2, bcolor(gs10%40) lcolor(black)) ///
 ylabel(, nogrid labsize(medium)) ytitle("")
graph export "$figures\avg_duration.pdf", as(pdf) replace
graph save Graph "$figures\avg_duration.gph", replace

* Average Interest Rate by Platform
graph hbar rate, over(Platform, gap(*1) sort(m_share)) graphregion(color(white)) ///
 bar(1, bcolor(emerald)) bar(2, bcolor(gs10%40) lcolor(black)) ///
 ylabel(, nogrid labsize(medium)) ytitle("")
graph export "$figures\avg_rate.pdf", as(pdf) replace
graph save Graph "$figures\avg_rate.gph", replace
